/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package com.karateca.columbiaimports;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 *
 * @author Administrator
 */
public class Test
{
    /**
     * @param args the command line arguments
     */
    public static void main(String[] args)
    {
//        String t = "  <tr>   <td width='8%' bgcolor='#006699' align='center' valign='middle'><font    color='#FFFFFF' size='1' face='MS Sans Serif'><b>3</b></font></td>   <td width='22%' bgcolor='#006699' align='center' valign='middle'><font    face='MS Sans Serif' size='1' color='#FFFFFF'><b>PROVEEDOR</b></font></td>   <td width='30%'>    <p align='center'><font color='#000080' size='1' face='MS Sans Serif'>3.1    Nombre / Razón social  </font></td>   <td width='16%'>    <p style='margin-top: 4; margin-bottom: 4'><font face='MS Sans Serif' size='1'>    ADVANCED COMMUNICATIONS, INC.  </font></td>  <td width='18%'>   <p align='center'><font color='#000080' size='1' face='MS Sans Serif'>3.2   Condición </font></td>  <center>  </tr> </table> </center></div><table border='1' cellpadding='4' cellspacing='1' width='100%' height='50'bgcolor='#FFFFFF'> <tr>  <td valign='top' width='172' height='1'>	     <p><font color='#000080' size='1' face='MS Sans Serif'>3.3 Dirección</font></p>	   </td>   <td valign='top' width='10%' height='1'>   <p align='center'><font color='#000080' size='1' face='MS Sans Serif'>Código</font></td>   <td valign='top' width='82' height='1'>    <p><font color='#000080' size='1' face='MS Sans Serif'>3.4 Ciudad</font></p>   </td>    <td valign='top' width='93' height='1'>    <p><font color='#000080' size='1' face='MS Sans Serif'>3.5 País</font></p>   </td>	  <td valign='top' width='65' height='1'>	   <p><font color='#000080' size='1' face='MS Sans Serif'>3.6 Fax</font></p>	  </td>   <td valign='top' width='76' height='1'>   <p><font color='#000080' size='1' face='MS Sans Serif'>3.7 Teléfono</font></p>  </td>  </tr> <tr>   <td valign='top' width='172' height='13'>    <p><font face='MS Sans Serif' size='1'>8487 N.W. 54 TH STREET</font></p>    </td>    <td valign='top' width='10%' height='13'>   <p align='center'><font face='MS Sans Serif' size='1'>9999999</font></td>  <td valign='top' width='82' height='13'>   <p><font face='MS Sans Serif' size='1'>MIAMI FLORIDA</font></p>  </td>  <td valign='top' width='93' height='13'>   <p><font face='MS Sans Serif' size='1'>UNITED STATES</font></p>  </td>  <td align='center' width='65' height='13'>   <p><font face='MS Sans Serif' size='1'>786-336-0050</font></p>  </td>  <td align='center' width='76' height='13'>   <p><font face='MS Sans Serif' size='1'>786-336-0046</font></p>  </td> </tr></table><div align='center'>  <center>  <div align='center'>   <table border='1' width='100%' height='25'>   <tr>    <td width='12%'>      <p align='center'><font color='#000080' size='1' face='MS Sans Serif'>3.8     Pagina Web</font></td>      <td width='38%'><font face='MS Sans Serif' size='1'>         COMMUNICATIONES CITY CORP.</font></td>     <td width='8%'>       <p align='center'><font color='#000080' size='1' face='MS Sans Serif'>3.9     E-mail</font></td>      <td width='42%'><font face='MS Sans Serif' size='1'>   	 <h     </font></td>     </tr>    </table>	    </div>	<div align='center'> <center> ";
//        //t = "  <tr>   <td width='8%' bgcolor='#006699' align='center' valign='middle'><font    color='#FFFFFF' size='1' face='MS Sans Serif'><b>3</b></font></td>   <td width='22%' bgcolor='#006699' align='center' valign='middle'><font    face='MS Sans Serif' size='1' color='#FFFFFF'><b>PROVEEDOR</b></font></td>   <td width='30%'>    <p align='center'><font color='#000080' size='1' face='MS Sans Serif'>3.1    Nombre / Razón social  </font></td>   <td width='16%'>    <p style='margin-top: 4; margin-bottom: 4'><font face='MS Sans Serif' size='1'>       </font></td>  <td width='18%'>   <p align='center'><font color='#000080' size='1' face='MS Sans Serif'>3.2   Condición </font></td>  <center>  </tr> </table> </center></div><table border='1' cellpadding='4' cellspacing='1' width='100%' height='50'bgcolor='#FFFFFF'> <tr>  <td valign='top' width='172' height='1'>	     <p><font color='#000080' size='1' face='MS Sans Serif'>3.3 Dirección</font></p>	   </td>   <td valign='top' width='10%' height='1'>   <p align='center'><font color='#000080' size='1' face='MS Sans Serif'>Código</font></td>   <td valign='top' width='82' height='1'>    <p><font color='#000080' size='1' face='MS Sans Serif'>3.4 Ciudad</font></p>   </td>    <td valign='top' width='93' height='1'>    <p><font color='#000080' size='1' face='MS Sans Serif'>3.5 País</font></p>   </td>	  <td valign='top' width='65' height='1'>	   <p><font color='#000080' size='1' face='MS Sans Serif'>3.6 Fax</font></p>	  </td>   <td valign='top' width='76' height='1'>   <p><font color='#000080' size='1' face='MS Sans Serif'>3.7 Teléfono</font></p>  </td>  </tr> <tr>   <td valign='top' width='172' height='13'>    <p><font face='MS Sans Serif' size='1'> </font></p>    </td>    <td valign='top' width='10%' height='13'>   <p align='center'><font face='MS Sans Serif' size='1'> </font></td>  <td valign='top' width='82' height='13'>   <p><font face='MS Sans Serif' size='1'> </font></p>  </td>  <td valign='top' width='93' height='13'>   <p><font face='MS Sans Serif' size='1'> </font></p>  </td>  <td align='center' width='65' height='13'>   <p><font face='MS Sans Serif' size='1'> </font></p>  </td>  <td align='center' width='76' height='13'>   <p><font face='MS Sans Serif' size='1'> </font></p>  </td> </tr></table><div align='center'>  <center>  <div align='center'>   <table border='1' width='100%' height='25'>   <tr>    <td width='12%'>      <p align='center'><font color='#000080' size='1' face='MS Sans Serif'>3.8     Pagina Web</font></td>      <td width='38%'><font face='MS Sans Serif' size='1'>          </font></td>     <td width='8%'>       <p align='center'><font color='#000080' size='1' face='MS Sans Serif'>3.9     E-mail</font></td>      <td width='42%'><font face='MS Sans Serif' size='1'>   	      </font></td>     </tr>    </table>	    </div>	<div align='center'> <center> ";
//        t = "  <tr>   <td width='8%' bgcolor='#006699' align='center' valign='middle'><font    color='#FFFFFF' size='1' face='MS Sans Serif'><b>3</b></font></td>   <td width='22%' bgcolor='#006699' align='center' valign='middle'><font    face='MS Sans Serif' size='1' color='#FFFFFF'><b>PROVEEDOR</b></font></td>   <td width='30%'>    <p align='center'><font color='#000080' size='1' face='MS Sans Serif'>3.1    Nombre / Razón social  </font></td>   <td width='16%'>    <p style='margin-top: 4; margin-bottom: 4'><font face='MS Sans Serif' size='1'>    ADVANCED COMMUNICATIONS, INC.  </font></td>  <td width='18%'>   <p align='center'><font color='#000080' size='1' face='MS Sans Serif'>3.2   Condición </font></td>  <center>  </tr> </table> </center></div><table border='1' cellpadding='4' cellspacing='1' width='100%' height='50'bgcolor='#FFFFFF'> <tr>  <td valign='top' width='172' height='1'>	     <p><font color='#000080' size='1' face='MS Sans Serif'>3.3 Dirección</font></p>	   </td>   <td valign='top' width='10%' height='1'>   <p align='center'><font color='#000080' size='1' face='MS Sans Serif'>Código</font></td>   <td valign='top' width='82' height='1'>    <p><font color='#000080' size='1' face='MS Sans Serif'>3.4 Ciudad</font></p>   </td>    <td valign='top' width='93' height='1'>    <p><font color='#000080' size='1' face='MS Sans Serif'>3.5 País</font></p>   </td>	  <td valign='top' width='65' height='1'>	   <p><font color='#000080' size='1' face='MS Sans Serif'>3.6 Fax</font></p>	  </td>   <td valign='top' width='76' height='1'>   <p><font color='#000080' size='1' face='MS Sans Serif'>3.7 Teléfono</font></p>  </td>  </tr> <tr>   <td valign='top' width='172' height='13'>    <p><font face='MS Sans Serif' size='1'>8487 N.W. 54 TH STREET</font></p>    </td>    <td valign='top' width='10%' height='13'>   <p align='center'><font face='MS Sans Serif' size='1'>9999999</font></td>  <td valign='top' width='82' height='13'>   <p><font face='MS Sans Serif' size='1'>MIAMI FLORIDA</font></p>  </td>  <td valign='top' width='93' height='13'>   <p><font face='MS Sans Serif' size='1'>UNITED STATES</font></p>  </td>  <td align='center' width='65' height='13'>   <p><font face='MS Sans Serif' size='1'>786-336-0050</font></p>  </td>  <td align='center' width='76' height='13'>   <p><font face='MS Sans Serif' size='1'>786-336-0046</font></p>  </td> </tr></table><div align='center'>  <center>  <div align='center'>   <table border='1' width='100%' height='25'>   <tr>    <td width='12%'>      <p align='center'><font color='#000080' size='1' face='MS Sans Serif'>3.8     Pagina Web</font></td>      <td width='38%'><font face='MS Sans Serif' size='1'>         COMMUNICATIONES CITY CORP.</font></td>     <td width='8%'>       <p align='center'><font color='#000080' size='1' face='MS Sans Serif'>3.9     E-mail</font></td>      <td width='42%'><font face='MS Sans Serif' size='1'>   	      </font></td>     </tr>    </table>	    </div>	<div align='center'> <center> ";
//        String regEx = "(<font face='MS Sans Serif' size='1'>)(.*?[^<])(</font>)";
//        List<String> values = findPatterns(t, regEx, 2);
//        values.add(1, "null");
//        System.out.println("--------- " + values.size());
//        for (String v : values)
//        {
//            System.out.println(v);
//        }
//        values = findPatterns(t, "font", 0);
//        System.out.println("--------- " + values.size());

        String t = "<TR><TD WIDTH='64%' VALIGN='TOP' COLSPAN=25 HEIGHT=14>" +
                "<FONT SIZE=2><P>SCHOTT GLASWERKE</FONT></TD>" +
                "<TD WIDTH='18%' VALIGN='TOP' COLSPAN=6 HEIGHT=14>" +
                "<FONT SIZE=2><P ALIGN='CENTER'>1</FONT></TD>" +
                "</TR>";
        String regEx = "(<FONT SIZE=2><P( ALIGN='CENTER')?>)(.*?)(</FONT></TD>)";
        //regEx = "((<FONT SIZE=2><P>)|(<FONT SIZE=2><P ALIGN='CENTER'>))(.*?)(</FONT></TD>)";

        System.out.println(findPatterns(t, regEx, 3).size());

        String link = "http://www.aduanet.gob.pe/servlet/SgCDUI2?codaduana=118&numecorre=081869&anoprese=1997&option=una&n=10";
        int index = link.indexOf("anoprese");
        String yearString = link.substring(index + 9, index + 13);
        System.out.println(yearString);

    }

    private static List<String> findPatterns(String text, String regEx, int patternGroup)
    {
        List<String> results;
        Pattern pattern;
        Matcher matcher;

        results = new ArrayList<String>();
        pattern = Pattern.compile(regEx);
        matcher = pattern.matcher(text);
        while (matcher.find())
        {
            results.add(matcher.group(patternGroup).trim());
        }
        return results;
    }
}
